/*
 ** $Id: llex.c,v 2.20.1.2 2009/11/23 14:58:22 roberto Exp $
 ** Lexical Analyzer
 ** See Copyright Notice in lua.h
 */


#include <ctype.h>
#include <locale.h>
#include <string.h>

#define llex_c
#define LUA_CORE

#include "lua/lua.h"

#include "lua/ldo.h"
#include "lua/llex.h"
#include "lua/lobject.h"
#include "lua/lparser.h"
#include "lua/lstate.h"
#include "lua/lstring.h"
#include "lua/ltable.h"
#include "lua/lzio.h"



#define next( ls ) ( ls->current = zgetc( ls->z ) )




#define currIsNewline( ls )	( ls->current == '\n' || ls->current == '\r' )


/* ORDER RESERVED */
const char *const luaX_tokens [] =
{
	"and", "break", "do", "else", "elseif",
	"end", "false", "for", "function", "if",
	"in", "local", "nil", "not", "or", "repeat",
	"return", "then", "true", "until", "while",
	"..", "...", "==", ">=", "<=", "~=",
	"<number>", "<name>", "<string>", "<eof>",
	"<extop>", "<intop>",
	NULL
};


#define save_and_next( ls ) ( save( ls, ls->current ), next( ls ) )


static void save ( LexState *ls, int c )
{
	Mbuffer *b = ls->buff;
	if ( b->n + 1 > b->buffsize )
	{
		size_t newsize;
		if ( b->buffsize >= MAX_SIZET/2 )
			luaX_lexerror( ls, "lexical element too long", 0 );
		newsize = b->buffsize * 2;
		luaZ_resizebuffer( ls->L, b, newsize );
	}
	b->buffer[b->n++] = cast( char, c );
}


static void save_string ( LexState *ls, const char *string_ )
{
	int i = 0;

	for ( ; i < strlen( string_ ); ++i )
	{
		save( ls, cast( int, string_[i] ) );
	}
}


static void interpret_as( LexState *ls, SemInfo * seminfo, const char* string_ )
{
	save_string( ls, string_ );
	seminfo->ts = luaX_newstring( ls,
			luaZ_buffer( ls->buff ),
			luaZ_bufflen( ls->buff ) );
}


void luaX_init ( lua_State *L )
{
	int i;
	for ( i=0; i<NUM_RESERVED; i++ )
	{
		TString *ts = luaS_new( L, luaX_tokens[i] );
		luaS_fix( ts );  /* reserved words are never collected */
		lua_assert( strlen( luaX_tokens[i] )+1 <= TOKEN_LEN );
		ts->tsv.reserved = cast_byte( i+1 );  /* reserved word */
	}
}


#define MAXSRC          80


const char *luaX_token2str ( LexState *ls, int token )
{
	if ( token < FIRST_RESERVED )
	{
		lua_assert( token == cast( unsigned char, token ) );
		return ( iscntrl( token ) ) ? luaO_pushfstring( ls->L, "char( %d )", token ) :
			luaO_pushfstring( ls->L, "%c", token );
	}
	else
		return luaX_tokens[token-FIRST_RESERVED];
}


static const char *txtToken ( LexState *ls, int token )
{
	switch ( token )
	{
		case TK_NAME:
		case TK_STRING:
		case TK_NUMBER:
			save( ls, '\0' );
			return luaZ_buffer( ls->buff );
		default:
			return luaX_token2str( ls, token );
	}
}


void luaX_lexerror ( LexState *ls, const char *msg, int token )
{
	char buff[MAXSRC];
	luaO_chunkid( buff, getstr( ls->source ), MAXSRC );
	msg = luaO_pushfstring( ls->L, "%s:%d: %s", buff, ls->linenumber, msg );
	if ( token )
		luaO_pushfstring( ls->L, "%s near " LUA_QS, msg, txtToken( ls, token ) );
	luaD_throw( ls->L, LUA_ERRSYNTAX );
}


void luaX_syntaxerror ( LexState *ls, const char *msg )
{
	luaX_lexerror( ls, msg, ls->t.token );
}


TString *luaX_newstring ( LexState *ls, const char *str, size_t l )
{
	lua_State *L = ls->L;
	TString *ts = luaS_newlstr( L, str, l );
	TValue *o = luaH_setstr( L, ls->fs->h, ts );  /* entry for `str' */
	if ( ttisnil( o ) )
	{
		setbvalue( o, 1 );  /* make sure `str' will not be collected */
		luaC_checkGC( L );
	}
	return ts;
}


static void inclinenumber ( LexState *ls )
{
	int old = ls->current;
	lua_assert( currIsNewline( ls ) );
	next( ls );  /* skip `\n' or `\r' */
	if ( currIsNewline( ls ) && ls->current != old )
		next( ls );  /* skip `\n\r' or `\r\n' */
	if ( ++ls->linenumber >= MAX_INT )
		luaX_syntaxerror( ls, "chunk has too many lines" );
}


void luaX_setinput ( lua_State *L, LexState *ls, ZIO *z, TString *source )
{
	ls->decpoint = '.';
	ls->L = L;
	ls->lookahead.token = TK_EOS;  /* no look-ahead token */
	ls->z = z;
	ls->fs = NULL;
	ls->linenumber = 1;
	ls->lastline = 1;
	ls->source = source;
	luaZ_resizebuffer( ls->L, ls->buff, LUA_MINBUFFER );  /* initialize buffer */
	next( ls );  /* read first char */
}



/*
 ** =======================================================
 ** LEXICAL ANALYZER
 ** =======================================================
 */



static int check_next ( LexState *ls, const char *set )
{
	if ( !strchr( set, ls->current ) )
		return 0;
	save_and_next( ls );
	return 1;
}


static void buffreplace ( LexState *ls, char from, char to )
{
	size_t n = luaZ_bufflen( ls->buff );
	char *p = luaZ_buffer( ls->buff );
	while ( n-- )
		if ( p[n] == from ) p[n] = to;
}


static void trydecpoint ( LexState *ls, SemInfo *seminfo )
{
	/* format error: try to update decimal point separator */
	struct lconv *cv = localeconv();
	char old = ls->decpoint;
	ls->decpoint = ( cv ? cv->decimal_point[0] : '.' );
	buffreplace( ls, old, ls->decpoint );  /* try updated decimal separator */
	if ( !luaO_str2d( luaZ_buffer( ls->buff ), &seminfo->r ) )
	{
		/* format error with correct decimal point: no more options */
		buffreplace( ls, ls->decpoint, '.' );  /* undo change ( for error message ) */
		luaX_lexerror( ls, "malformed number", TK_NUMBER );
	}
}


/* LUA_NUMBER */
static void read_numeral ( LexState *ls, SemInfo *seminfo )
{
	lua_assert( isdigit( ls->current ) );
	do
	{
		save_and_next( ls );
	} while ( isdigit( ls->current ) || ls->current == '.' );
	if ( check_next( ls, "Ee" ) )  /* `E'? */
		check_next( ls, "+-" );  /* optional exponent sign */
	while ( isalnum( ls->current ) || ls->current == '_' )
		save_and_next( ls );
	save( ls, '\0' );
	buffreplace( ls, '.', ls->decpoint );  /* follow locale for decimal point */
	if ( !luaO_str2d( luaZ_buffer( ls->buff ), &seminfo->r ) )  /* format error? */
		trydecpoint( ls, seminfo ); /* try to update decimal point separator */
}


static int skip_sep ( LexState *ls )
{
	int count = 0;
	int s = ls->current;
	lua_assert( s == '[' || s == ']' );
	save_and_next( ls );
	while ( ls->current == '=' )
	{
		save_and_next( ls );
		count++;
	}
	return ( ls->current == s ) ? count : ( -count ) - 1;
}


static void read_long_string ( LexState *ls, SemInfo *seminfo, int sep )
{
	int cont = 0;
	( void )( cont );  /* avoid warnings when `cont' is not used */
	save_and_next( ls );  /* skip 2nd `[' */
	if ( currIsNewline( ls ) )  /* string starts with a newline? */
		inclinenumber( ls );  /* skip it */
	for ( ;; )
	{
		switch ( ls->current )
		{
			case EOZ:
				luaX_lexerror( ls, ( seminfo ) ? "unfinished long string" :
						"unfinished long comment", TK_EOS );
				break;  /* to avoid warnings */
#if defined( LUA_COMPAT_LSTR )
			case '[':
				{
					if ( skip_sep( ls ) == sep )
					{
						save_and_next( ls );  /* skip 2nd `[' */
						cont++;
#if LUA_COMPAT_LSTR == 1
						if ( sep == 0 )
							luaX_lexerror( ls, "nesting of [[...]] is deprecated", '[' );
#endif
					}
					break;
				}
#endif
			case ']':
				{
					if ( skip_sep( ls ) == sep )
					{
						save_and_next( ls );  /* skip 2nd `]' */
#if defined( LUA_COMPAT_LSTR ) && LUA_COMPAT_LSTR == 2
						cont--;
						if ( sep == 0 && cont >= 0 ) break;
#endif
						goto endloop;
					}
					break;
				}
			case '\n':
			case '\r':
				{
					save( ls, '\n' );
					inclinenumber( ls );
					if ( !seminfo ) luaZ_resetbuffer( ls->buff );  /* avoid wasting space */
					break;
				}
			default:
				{
					if ( seminfo ) save_and_next( ls );
					else next( ls );
				}
		}
	} endloop:
	if ( seminfo )
		seminfo->ts = luaX_newstring( ls, luaZ_buffer( ls->buff ) + ( 2 + sep ),
				luaZ_bufflen( ls->buff ) - 2*( 2 + sep ) );
}


static void read_string ( LexState *ls, int del, SemInfo *seminfo )
{
	save_and_next( ls );
	while ( ls->current != del )
	{
		switch ( ls->current )
		{
			case EOZ:
				luaX_lexerror( ls, "unfinished string", TK_EOS );
				continue;  /* to avoid warnings */
			case '\n':
			case '\r':
				luaX_lexerror( ls, "unfinished string", TK_STRING );
				continue;  /* to avoid warnings */
			case '\\':
				{
					int c;
					next( ls );  /* do not save the `\' */
					switch ( ls->current )
					{
						case 'a': c = '\a'; break;
						case 'b': c = '\b'; break;
						case 'f': c = '\f'; break;
						case 'n': c = '\n'; break;
						case 'r': c = '\r'; break;
						case 't': c = '\t'; break;
						case 'v': c = '\v'; break;
						case '\n':  /* go through */
						case '\r': save( ls, '\n' ); inclinenumber( ls ); continue;
						case EOZ: continue;  /* will raise an error next loop */
						default:
								  {
									  if ( !isdigit( ls->current ) )
										  save_and_next( ls );  /* handles \\, \", \', and \? */
									  else
									  {  /* \xxx */
										  int i = 0;
										  c = 0;
										  do
										  {
											  c = 10*c + ( ls->current-'0' );
											  next( ls );
										  } while ( ++i<3 && isdigit( ls->current ) );
										  if ( c > UCHAR_MAX )
											  luaX_lexerror( ls, "escape sequence too large", TK_STRING );
										  save( ls, c );
									  }
									  continue;
								  }
					}
					save( ls, c );
					next( ls );
					continue;
				}
			default:
				save_and_next( ls );
		}
	}
	save_and_next( ls );  /* skip delimiter */
	seminfo->ts = luaX_newstring( ls, luaZ_buffer( ls->buff ) + 1,
			luaZ_bufflen( ls->buff ) - 2 );
}


static int llex ( LexState *ls, SemInfo *seminfo )
{
	luaZ_resetbuffer( ls->buff );
	for ( ;; )
	{
		switch ( ls->current )
		{
			case '\n':
			case '\r':
				{
					inclinenumber( ls );
					continue;
				}
			case '-':
				{
					next( ls );
					if ( ls->current != '-' )
					{
						if ( ls->current != '.' )
							return '-';
						else
						{
							next( ls );
							interpret_as( ls, seminfo, "__operator__sub" );
							return TK_EXTOP;
						}
					}
					/* else is a comment */
					next( ls );
					if ( ls->current == '[' )
					{
						int sep = skip_sep( ls );
						luaZ_resetbuffer( ls->buff );  /* `skip_sep' may dirty the buffer */
						if ( sep >= 0 )
						{
							read_long_string( ls, NULL, sep );  /* long comment */
							luaZ_resetbuffer( ls->buff );
							continue;
						}
					}
					/* else short comment */
					while ( !currIsNewline( ls ) && ls->current != EOZ )
						next( ls );
					continue;
				}
			case '+':
				{
					next( ls );
					if ( ls->current != '.' ) return '+';
					next( ls );
					interpret_as( ls, seminfo, "__operator__add" );
					return TK_EXTOP;
				}
			case '*':
				{
					next( ls );
					if ( ls->current != '.' ) return '*';
					next( ls );
					interpret_as( ls, seminfo, "__operator__time" );
					return TK_EXTOP;
				}
			case '/':
				{
					next( ls );
					if ( ls->current != '.' ) return '/';
					next( ls );
					interpret_as( ls, seminfo, "__operator__over" );
					return TK_EXTOP;
				}
			case '[':
				{
					int sep = skip_sep( ls );
					if ( sep >= 0 )
					{
						read_long_string( ls, seminfo, sep );
						return TK_STRING;
					}
					else if ( sep == -1 ) return '[';
					else luaX_lexerror( ls, "invalid long string delimiter", TK_STRING );
				}
			case '=':
				{
					next( ls );
					if ( ls->current != '=' ) return '=';
					else
					{ next( ls ); return TK_EQ; }
				}
			case '<':
				{
					next( ls );
					if ( ls->current == '=' )
					{
						next( ls );
						if ( ls->current != '>' )
							return TK_LE;
						else
						{
							next( ls );
							interpret_as( ls, seminfo, "__operator__sync" );
							return TK_EXTOP;
						}
					}
					if ( ls->current == '<' )
					{
						next( ls );
						interpret_as( ls, seminfo, "__operator__push" );
						return TK_EXTOP;
					}
					return '<';
				}
			case '>':
				{
					next( ls );
					if ( ls->current != '=' ) return '>';
					else
					{ next( ls ); return TK_GE; }
				}
			case '!':
				{
					next( ls );
						if ( ls->current != '=' )
						{
							interpret_as( ls, seminfo, "__operator__fact" );
							return TK_INTOP;
						}
						else
						{
							next( ls );
							interpret_as( ls, seminfo, "__operator__diff" );
							return TK_EXTOP;
						}
				}
			case '~':
				{
					next( ls );
					if ( ls->current != '=' ) return '~';
					else
					{ next( ls ); return TK_NE; }
				}
			case '"':
			case '\'':
				{
					read_string( ls, ls->current, seminfo );
					return TK_STRING;
				}
			case '.':
				{
					save_and_next( ls );
					if ( check_next( ls, "." ) )
					{
						if ( check_next( ls, "." ) )
							return TK_DOTS;   /* ... */
						else return TK_CONCAT;   /* .. */
					}
					else if ( !isdigit( ls->current ) ) return '.';
					else
					{
						read_numeral( ls, seminfo );
						return TK_NUMBER;
					}
				}
			case EOZ:
				{
					return TK_EOS;
				}
			default:
				{
					if ( isspace( ls->current ) )
					{
						lua_assert( !currIsNewline( ls ) );
						next( ls );
						continue;
					}
					else if ( isdigit( ls->current ) )
					{
						read_numeral( ls, seminfo );
						return TK_NUMBER;
					}
					else if ( isalpha( ls->current ) || ls->current == '_' )
					{
						/* identifier or reserved word */
						TString *ts;
						do
						{
							save_and_next( ls );
						}
						while ( isalnum( ls->current )
								|| ls->current == '_' );
						ts = luaX_newstring( ls,
								luaZ_buffer( ls->buff ),
								luaZ_bufflen( ls->buff ) );
						if ( ts->tsv.reserved > 0 )  /* reserved word? */
							return ts->tsv.reserved - 1 + FIRST_RESERVED;
						else
						{
							seminfo->ts = ts;
							return TK_NAME;
						}
					}
					else
					{
						int c = ls->current;
						next( ls );
						return c;  /* single-char tokens ( + - / ... ) */
					}
				}
		}
	}
}


void luaX_next ( LexState *ls )
{
	ls->lastline = ls->linenumber;
	if ( ls->lookahead.token != TK_EOS )
	{  /* is there a look-ahead token? */
		ls->t = ls->lookahead;  /* use this one */
		ls->lookahead.token = TK_EOS;  /* and discharge it */
	}
	else
		ls->t.token = llex( ls, &ls->t.seminfo );  /* read next token */
}


void luaX_lookahead ( LexState *ls )
{
	lua_assert( ls->lookahead.token == TK_EOS );
	ls->lookahead.token = llex( ls, &ls->lookahead.seminfo );
}

